from app import db, login_manager
from flask_login import UserMixin
from werkzeug.security import generate_password_hash, check_password_hash
from sqlalchemy.dialects.mysql import LONGTEXT
import json
from app.utils import get_beijing_time
from flask import current_app

@login_manager.user_loader
def load_user(user_id):
    try:
        if user_id is None:
            return None
        return User.query.get(int(user_id))
    except (ValueError, TypeError) as e:
        # 记录错误但不抛出异常，返回None让Flask-Login处理
        current_app.logger.error(f"Error loading user {user_id}: {e}")
        return None

class User(UserMixin, db.Model):
    __tablename__ = 'user'
    id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String(64), index=True, unique=True, nullable=False)
    password_hash = db.Column(db.String(256), nullable=False)
    created_at = db.Column(db.DateTime, default=get_beijing_time)

    model = db.relationship('AIModel', back_populates='owner', lazy='dynamic', cascade="all, delete-orphan")
    chat_sessions = db.relationship('ChatSession', back_populates='user', lazy='dynamic', cascade="all, delete-orphan")
    evaluation_effectiveness = db.relationship('ModelEvaluation', back_populates='user', lazy='dynamic', cascade="all, delete-orphan")

    def set_password(self, password):
        self.password_hash = generate_password_hash(password)

    def check_password(self, password):
        return check_password_hash(self.password_hash, password)

    def __repr__(self):
        return f'<User {self.username}>'

class AIModel(db.Model):
    __tablename__ = 'model'
    id = db.Column(db.Integer, primary_key=True)
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=True)
    
    display_name = db.Column(db.String(100), nullable=False)
    model_type = db.Column(db.String(50), nullable=False, default='openai_compatible') 
    api_base_url = db.Column(db.String(255), nullable=False)
    model_identifier = db.Column(db.String(100), nullable=False)
    encrypted_api_key = db.Column(db.String(512), nullable=True)
    provider_name = db.Column(db.String(100), nullable=True)
    is_system_model = db.Column(db.Boolean, default=False, nullable=False)
    system_prompt = db.Column(db.Text, nullable=True, default="You are a helpful assistant.")
    default_temperature = db.Column(db.Float, nullable=True, default=0.7)
    notes = db.Column(db.Text, nullable=True)
    is_validated = db.Column(db.Boolean, default=False)
    created_at = db.Column(db.DateTime, default=get_beijing_time)
    updated_at = db.Column(db.DateTime, default=get_beijing_time, onupdate=get_beijing_time)

    owner = db.relationship('User', back_populates='model')
    chat_messages = db.relationship('ChatMessage', back_populates='model', lazy='dynamic')
    evaluations = db.relationship('ModelEvaluation', foreign_keys='ModelEvaluation.model_id', back_populates='model', lazy='dynamic')
    judge_evaluations = db.relationship('ModelEvaluation', foreign_keys='ModelEvaluation.judge_model_id', back_populates='judge_model', lazy='dynamic')

    def __repr__(self):
        return f'<AIModel {self.display_name} ({self.model_identifier})>'

class ChatSession(db.Model):
    __tablename__ = 'chat_session'
    id = db.Column(db.Integer, primary_key=True)
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
    session_name = db.Column(db.String(150), nullable=True)
    created_at = db.Column(db.DateTime, default=get_beijing_time)
    updated_at = db.Column(db.DateTime, default=get_beijing_time, onupdate=get_beijing_time)
    config_data = db.Column(db.JSON, nullable=True) # 存储会话的配置数据，例如模型配置

    user = db.relationship('User', back_populates='chat_sessions')
    messages = db.relationship('ChatMessage', back_populates='session', lazy='dynamic', cascade="all, delete-orphan")

    def __repr__(self):
        return f'<ChatSession {self.id} by User {self.user_id}>'

class ChatMessage(db.Model):
    __tablename__ = 'chat_message'
    id = db.Column(db.Integer, primary_key=True)
    session_id = db.Column(db.Integer, db.ForeignKey('chat_session.id'), nullable=False)
    model_id = db.Column(db.Integer, db.ForeignKey('model.id'), nullable=True) 
    role = db.Column(db.String(20), nullable=False)  # 'user', 'assistant', 'system'
    content = db.Column(db.Text, nullable=False)
    timestamp = db.Column(db.DateTime, default=get_beijing_time)
    settings_snapshot = db.Column(db.JSON, nullable=True) 

    session = db.relationship('ChatSession', back_populates='messages')
    model = db.relationship('AIModel', back_populates='chat_messages')

    def __repr__(self):
        return f'<ChatMessage {self.id} in Session {self.session_id} by {self.role}>' 

# 用于 Dataset 和 DatasetCategory 的多对多关联表
dataset_categories_association = db.Table('dataset_category',
    db.Column('dataset_id', db.Integer, db.ForeignKey('dataset.id'), primary_key=True),
    db.Column('category_id', db.Integer, db.ForeignKey('category.id'), primary_key=True)
)

class DatasetCategory(db.Model):
    __tablename__ = 'category'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(100), nullable=False, unique=True) # 分类名称

    # 反向关系，方便从 Category 查找到所有相关的 Dataset
    # datasets = db.relationship("Dataset", secondary=dataset_categories_association, back_populates="categories")

    def __repr__(self):
        return f'<DatasetCategory {self.name}>'

class Dataset(db.Model):
    __tablename__ = 'dataset'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(200), nullable=False, unique=True)
    description = db.Column(db.Text, nullable=True)
    publish_date = db.Column(db.String(50), nullable=True)
    source = db.Column(db.String(100), nullable=True)
    download_url = db.Column(db.String(255), nullable=True)
    dataset_info = db.Column(LONGTEXT, nullable=True)
    
    # 新增字段
    dataset_type = db.Column(db.String(50), nullable=False, default='系统', server_default='系统')
    visibility = db.Column(db.String(50), nullable=False, default='公开', server_default='公开')
    format = db.Column(db.String(50), nullable=False, default='QA', server_default='QA')
    jinja2_template = db.Column(LONGTEXT, nullable=True)  # 修改为存储模板内容
    is_active = db.Column(db.Boolean, nullable=False, default=True, server_default='1')
    
    # 多对多关系到 DatasetCategory
    categories = db.relationship("DatasetCategory", 
                                 secondary=dataset_categories_association,
                                 backref=db.backref("dataset", lazy="dynamic"),
                                 lazy="select") # 使用 select 加载模式，避免N+1查询，也可以用 'joined'

    # 评估关系
    evaluations = db.relationship('ModelEvaluationDataset', back_populates='dataset', lazy='dynamic')

    def __repr__(self):
        return f'<Dataset {self.name}>'

# 模型评估相关数据模型
class ModelEvaluation(db.Model):
    """模型评估记录"""
    __tablename__ = 'evaluation_effectiveness'
    id = db.Column(db.Integer, primary_key=True)
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
    model_id = db.Column(db.Integer, db.ForeignKey('model.id'), nullable=False)
    judge_model_id = db.Column(db.Integer, db.ForeignKey('model.id'), nullable=True)
    name = db.Column(db.String(150), nullable=True)
    temperature = db.Column(db.Float, nullable=False, default=0.7)
    max_tokens = db.Column(db.Integer, nullable=False, default=2048)
    top_k = db.Column(db.Integer, nullable=True, default=20)  # 新增top_k字段
    top_p = db.Column(db.Float, nullable=True, default=0.8)  # 新增top_p字段
    judge_worker_num = db.Column(db.Integer, nullable=True, default=1)  # 新增并发数字段
    eval_batch_size = db.Column(db.Integer, nullable=True, default=4)  # 新增评估并发数字段
    status = db.Column(db.String(20), nullable=False, default='pending')
    created_at = db.Column(db.DateTime, default=get_beijing_time)
    completed_at = db.Column(db.DateTime, nullable=True)
    result_summary = db.Column(db.JSON, nullable=True)
    limit = db.Column(db.Integer, nullable=True)
    user = db.relationship('User', back_populates='evaluation_effectiveness')
    model = db.relationship('AIModel', foreign_keys=[model_id], back_populates='evaluations')
    judge_model = db.relationship('AIModel', foreign_keys=[judge_model_id], back_populates='judge_evaluations')
    datasets = db.relationship('ModelEvaluationDataset', back_populates='evaluation', lazy='dynamic', cascade="all, delete-orphan")
    evaluation_results = db.relationship('ModelEvaluationResult', back_populates='evaluation', lazy='dynamic', cascade="all, delete-orphan")
    
    def __repr__(self):
        return f'<ModelEvaluation {self.id} for Model {self.model_id}>'

class ModelEvaluationDataset(db.Model):
    """模型评估中使用的数据集"""
    __tablename__ = 'evaluation_effectiveness_dataset'
    evaluation_id = db.Column(db.Integer, db.ForeignKey('evaluation_effectiveness.id'), primary_key=True)
    dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'), primary_key=True)
    subset = db.Column(db.String(100), nullable=True)
    split = db.Column(db.String(100), nullable=True)
    
    evaluation = db.relationship('ModelEvaluation', back_populates='datasets')
    dataset = db.relationship('Dataset', back_populates='evaluations')
    
    def __repr__(self):
        return f'<ModelEvaluationDataset {self.dataset_id} for Evaluation {self.evaluation_id}>'

class ModelEvaluationResult(db.Model):
    """模型评估的详细结果"""
    __tablename__ = 'evaluation_effectiveness_result'
    id = db.Column(db.Integer, primary_key=True)
    evaluation_id = db.Column(db.Integer, db.ForeignKey('evaluation_effectiveness.id'), nullable=False)
    dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'), nullable=False)  # 改为dataset_id并添加外键
    question = db.Column(db.Text, nullable=False)
    reference_answer = db.Column(db.Text, nullable=True)
    model_answer = db.Column(db.Text, nullable=False)
    score = db.Column(db.Float, nullable=True)
    feedback = db.Column(db.Text, nullable=True)
    
    evaluation = db.relationship('ModelEvaluation', back_populates='evaluation_results')
    dataset = db.relationship('Dataset', backref=db.backref('evaluation_results', lazy='dynamic'))  # 添加与Dataset的关系
    
    def __repr__(self):
        return f'<ModelEvaluationResult {self.id} for Evaluation {self.evaluation_id}>' 

# 新增：模型性能评估任务模型
class PerformanceEvalTask(db.Model):
    __tablename__ = 'model_efficiency'
    id = db.Column(db.Integer, primary_key=True)
    # 关联到用户，确保用户隔离
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
    # 关联到AIModel，如果需要直接选择已注册的模型
    # model_id = db.Column(db.Integer, db.ForeignKey('model.id'), nullable=False) 
    model_name = db.Column(db.String(150), nullable=False)
    dataset_name = db.Column(db.String(150), nullable=False)
    concurrency = db.Column(db.Integer, nullable=False)
    num_requests = db.Column(db.Integer, nullable=False)
    
    status = db.Column(db.String(50), nullable=False, default='pending')
    created_at = db.Column(db.DateTime, default=get_beijing_time)
    started_at = db.Column(db.DateTime, nullable=True)
    completed_at = db.Column(db.DateTime, nullable=True)
    
    # 存储 evalscope.perf.main.run_perf_benchmark 的输出
    summary_results = db.Column(db.Text, nullable=True)
    percentile_results = db.Column(db.Text, nullable=True)
    raw_output = db.Column(db.Text, nullable=True)
    error_message = db.Column(db.Text, nullable=True)

    # 关联到用户
    user = db.relationship('User', backref=db.backref('model_efficiency', lazy='dynamic'))

    # 如果模型是从 AIModel 表中选择的
    # model = db.relationship('AIModel', backref=db.backref('model_efficiency', lazy='dynamic'))

    def __repr__(self):
        return f'<PerformanceEvalTask {self.id} for model {self.model_name} on dataset {self.dataset_name}>'

def init_database_data():
    """
    初始化数据库数据
    只在数据库为空时执行，避免重复插入
    """
    from app import db
    
    try:
        # 检查是否已有数据集分类数据
        if DatasetCategory.query.count() == 0:
            current_app.logger.info("正在初始化数据集分类...")
            categories = [
                (21, '函数调用'),
                (9, '创作'),
                (5, '多模态'),
                (19, '多轮对话'),
                (1, '学科'),
                (17, '安全'),
                (13, '推理'),
                (11, '数学'),
                (7, '理解'),
                (3, '知识'),
                (23, '综合'),
                (15, '语言'),
            ]
            
            for cat_id, cat_name in categories:
                category = DatasetCategory(id=cat_id, name=cat_name)
                db.session.add(category)
            
            try:
                db.session.commit()
                current_app.logger.info("数据集分类初始化完成")
            except Exception as e:
                db.session.rollback()
                current_app.logger.error(f"数据集分类初始化失败: {e}")
        
        # 检查是否已有系统数据集数据
        if Dataset.query.count() == 0:
            current_app.logger.info("正在初始化系统数据集...")
            datasets = [
                {
                    'id': 5,
                    'name': 'ceval',
                    'description': 'C-Eval 是一个全面的中文基础模型评估套件。它包含了13948个多项选择题，涵盖了52个不同的学科和四个难度级别。',
                    'publish_date': '2025-05-01',
                    'source': '公开数据集',
                    'download_url': 'modelscope/ceval-exam',
                    'dataset_info': json.dumps({"computer_network": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "computer_network", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 35408, "num_examples": 171, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3799, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2361, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 41568, "size_in_bytes": 1589825}, "operating_system": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "operating_system", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 31146, "num_examples": 179, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3299, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2557, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 37002, "size_in_bytes": 1585259}, "computer_architecture": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "computer_architecture", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 40613, "num_examples": 193, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4149, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2793, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 47555, "size_in_bytes": 1595812}, "college_programming": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "college_programming", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83541, "num_examples": 342, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9543, "num_examples": 37, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2882, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 95966, "size_in_bytes": 1644223}, "college_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "college_physics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55731, "num_examples": 176, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6145, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3824, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 65700, "size_in_bytes": 1613957}, "college_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "college_chemistry", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 45798, "num_examples": 224, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4443, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3611, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 53852, "size_in_bytes": 1602109}, "advanced_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "advanced_mathematics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 50031, "num_examples": 173, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5331, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 7012, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 62374, "size_in_bytes": 1610631}, "probability_and_statistics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "probability_and_statistics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56740, "num_examples": 166, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5781, "num_examples": 18, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 6769, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 69290, "size_in_bytes": 1617547}, "discrete_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "discrete_mathematics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 36045, "num_examples": 153, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3424, "num_examples": 16, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2002, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 41471, "size_in_bytes": 1589728}, "electrical_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "electrical_engineer", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 73731, "num_examples": 339, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8315, "num_examples": 37, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2180, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 84226, "size_in_bytes": 1632483}, "metrology_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "metrology_engineer", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47484, "num_examples": 219, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6116, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2485, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 56085, "size_in_bytes": 1604342}, "high_school_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_mathematics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41080, "num_examples": 166, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5144, "num_examples": 18, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3552, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 49776, "size_in_bytes": 1598033}, "high_school_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_physics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 61678, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 7266, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2266, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 71210, "size_in_bytes": 1619467}, "high_school_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_chemistry", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 46918, "num_examples": 172, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5625, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2576, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 55119, "size_in_bytes": 1603376}, "high_school_biology": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_biology", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55239, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6105, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2164, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 63508, "size_in_bytes": 1611765}, "middle_school_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_mathematics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 33142, "num_examples": 177, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4897, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3187, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 41226, "size_in_bytes": 1589483}, "middle_school_biology": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_biology", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47264, "num_examples": 192, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5263, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4327, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 56854, "size_in_bytes": 1605111}, "middle_school_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_physics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 48793, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5279, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3531, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 57603, "size_in_bytes": 1605860}, "middle_school_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_chemistry", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47575, "num_examples": 185, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5654, "num_examples": 20, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3866, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 57095, "size_in_bytes": 1605352}, "veterinary_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "veterinary_medicine", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 39465, "num_examples": 210, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4559, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2362, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 46386, "size_in_bytes": 1594643}, "college_economics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "college_economics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 119734, "num_examples": 497, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 14461, "num_examples": 55, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3673, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 137868, "size_in_bytes": 1686125}, "business_administration": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "business_administration", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 78387, "num_examples": 301, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9225, "num_examples": 33, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3155, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 90767, "size_in_bytes": 1639024}, "marxism": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "marxism", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 38662, "num_examples": 179, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4251, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2142, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 45055, "size_in_bytes": 1593312}, "mao_zedong_thought": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "mao_zedong_thought", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56699, "num_examples": 219, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5487, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3349, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 65535, "size_in_bytes": 1613792}, "education_science": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "education_science", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55753, "num_examples": 270, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5519, "num_examples": 29, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3093, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 64365, "size_in_bytes": 1612622}, "teacher_qualification": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "teacher_qualification", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 107369, "num_examples": 399, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12220, "num_examples": 44, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3215, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 122804, "size_in_bytes": 1671061}, "high_school_politics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_politics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83356, "num_examples": 176, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8909, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4730, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 96995, "size_in_bytes": 1645252}, "high_school_geography": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_geography", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41244, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3985, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2087, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 47316, "size_in_bytes": 1595573}, "middle_school_politics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_politics", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 72478, "num_examples": 193, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 7320, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3687, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 83485, "size_in_bytes": 1631742}, "middle_school_geography": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_geography", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 23329, "num_examples": 108, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 2641, "num_examples": 12, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2148, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 28118, "size_in_bytes": 1576375}, "modern_chinese_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "modern_chinese_history", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 51247, "num_examples": 212, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5188, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2983, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 59418, "size_in_bytes": 1607675}, "ideological_and_moral_cultivation": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "ideological_and_moral_cultivation", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 35315, "num_examples": 172, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3241, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1296, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 39852, "size_in_bytes": 1588109}, "logic": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "logic", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 144246, "num_examples": 204, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 15561, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 5641, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 165448, "size_in_bytes": 1713705}, "law": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "law", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 79782, "num_examples": 221, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8119, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4142, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 92043, "size_in_bytes": 1640300}, "chinese_language_and_literature": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "chinese_language_and_literature", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 32328, "num_examples": 209, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3446, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1892, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 37666, "size_in_bytes": 1585923}, "art_studies": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "art_studies", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41227, "num_examples": 298, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4581, "num_examples": 33, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1439, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 47247, "size_in_bytes": 1595504}, "professional_tour_guide": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "professional_tour_guide", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41231, "num_examples": 266, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4509, "num_examples": 29, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1764, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 47504, "size_in_bytes": 1595761}, "legal_professional": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "legal_professional", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 121985, "num_examples": 215, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12215, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 6974, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 141174, "size_in_bytes": 1689431}, "high_school_chinese": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_chinese", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 110347, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 10475, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 5290, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 126112, "size_in_bytes": 1674369}, "high_school_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "high_school_history", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56196, "num_examples": 182, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6618, "num_examples": 20, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2421, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 65235, "size_in_bytes": 1613492}, "middle_school_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "middle_school_history", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47076, "num_examples": 207, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5990, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2014, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 55080, "size_in_bytes": 1603337}, "civil_servant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "civil_servant", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 181504, "num_examples": 429, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 21273, "num_examples": 47, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4576, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 207353, "size_in_bytes": 1755610}, "sports_science": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "sports_science", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 32527, "num_examples": 180, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3493, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4182, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 40202, "size_in_bytes": 1588459}, "plant_protection": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "plant_protection", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 31877, "num_examples": 199, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3634, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3726, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 39237, "size_in_bytes": 1587494}, "basic_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "basic_medicine", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 28820, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 2627, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1825, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 33272, "size_in_bytes": 1581529}, "clinical_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "clinical_medicine", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 42161, "num_examples": 200, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4167, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1951, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 48279, "size_in_bytes": 1596536}, "urban_and_rural_planner": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "urban_and_rural_planner", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 110377, "num_examples": 418, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12793, "num_examples": 46, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3166, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 126336, "size_in_bytes": 1674593}, "accountant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "accountant", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 176917, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 19549, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3414, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 199880, "size_in_bytes": 1748137}, "fire_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "fire_engineer", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83611, "num_examples": 282, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9998, "num_examples": 31, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2209, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 95818, "size_in_bytes": 1644075}, "environmental_impact_assessment_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "environmental_impact_assessment_engineer", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 84680, "num_examples": 281, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9186, "num_examples": 31, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2495, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 96361, "size_in_bytes": 1644618}, "tax_accountant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "tax_accountant", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 174482, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 18932, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4274, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 197688, "size_in_bytes": 1745945}, "physician": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\\n", "citation": "@article{huang2023ceval,\\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \\n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\\n    journal={arXiv preprint arXiv:2305.08322},\\n    year={2023}\\n}\\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": None, "_type": "Value"}, "question": {"dtype": "string", "id": None, "_type": "Value"}, "A": {"dtype": "string", "id": None, "_type": "Value"}, "B": {"dtype": "string", "id": None, "_type": "Value"}, "C": {"dtype": "string", "id": None, "_type": "Value"}, "D": {"dtype": "string", "id": None, "_type": "Value"}, "answer": {"dtype": "string", "id": None, "_type": "Value"}, "explanation": {"dtype": "string", "id": None, "_type": "Value"}}, "post_processed": None, "supervised_keys": None, "task_templates": None, "builder_name": "ceval-exam", "config_name": "physician", "version": {"version_str": "1.0.0", "description": None, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 89801, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8710, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2033, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": None, "dataset_size": 100544, "size_in_bytes": 1648801}}),
                    'dataset_type': '系统',
                    'visibility': '公开',
                    'format': 'MCQ',
                    'is_active': 1
                },
                {
                    'id': 29,
                    'name': 'iquiz',
                    'description': '智商测试：收集自网络公开的面试题、脑筋急转弯、弱智吧、模型陷阱题等，目前一共40道题目，分成了level 1，2，3（level 1 是基础题，level 2 是中等难度，level 3 是高难度），分别有13，13，14道题目。情商测试：收集自ToMBench，目前一共80道题目，用于测试模型的情商。',
                    'publish_date': '2025-05-01',
                    'source': '公开数据集',
                    'download_url': 'AI-ModelScope/IQuiz',
                    'dataset_info': json.dumps({"IQ": {"features": {"question": {"_type": "Value"}, "choices": {"_type": "Value"}, "answer": {"_type": "Value"}, "level": {"_type": "Value"}}, "splits": {"test": {"name": "test", "dataset_name": "IQuiz"}}}, "EQ": {"features": {"question": {"_type": "Value"}, "choices": {"_type": "Value"}, "answer": {"_type": "Value"}, "level": {"_type": "Value"}}, "splits": {"test": {"name": "test", "dataset_name": "IQuiz"}}}}),
                    'dataset_type': '系统',
                    'visibility': '公开',
                    'format': 'MCQ',
                    'is_active': 1
                },
                {
                    'id': 35,
                    'name': 'chinese_simpleqa',
                    'description': '中文 SimpleQA 是第一个用于评估语言模型回答简短问题的事实性能力的全面中文基准测试。该基准主要具备五个特性（即中文、多样性、高质量、静态性和易评估性',
                    'publish_date': '2025-06-01',
                    'source': '公开数据集',
                    'download_url': 'AI-ModelScope/Chinese-SimpleQA',
                    'dataset_info': json.dumps({"default": {"features": {"id": {"_type": "Value"}, "primary_category": {"_type": "Value"}, "secondary_category": {"_type": "Value"}, "question": {"_type": "Value"}, "answer": {"_type": "Value"}, "urls": {"_type": "Value"}}, "splits": {"train": {"name": "train", "dataset_name": "Chinese-SimpleQA"}}}}),
                    'dataset_type': '系统',
                    'visibility': '公开',
                    'format': 'QA',
                    'is_active': 1
                },
            ]
            
            for dataset_data in datasets:
                dataset = Dataset(**dataset_data)
                db.session.add(dataset)
            
            try:
                db.session.commit()
                current_app.logger.info("系统数据集初始化完成")
            except Exception as e:
                db.session.rollback()
                current_app.logger.error(f"系统数据集初始化失败: {e}")
        
        current_app.logger.info("数据库初始化检查完成")
        
    except Exception as e:
        current_app.logger.error(f"数据库初始化过程中发生错误: {e}")
        # 不抛出异常，让应用继续启动 